R Markdown

##  [1] "macedonia"                        "united states of america"        
##  [3] "somaliland"                       "republic of serbia"              
##  [5] "swaziland"                        "united republic of tanzania"     
##  [7] "the bahamas"                      "democratic republic of the congo"
##  [9] "republic of congo"                "northern cyprus"                 
## [11] "antarctica"                       "guinea bissau"                   
## [13] "kosovo"
##                     region continent population
## 1                    china      Asia 1433783686
## 2                    india      Asia 1366417754
## 3 united states of america  Americas  329064917
## Parsed with column specification:
## cols(
##   Date = col_date(format = ""),
##   `Country/Region` = col_character(),
##   `Province/State` = col_character(),
##   Lat = col_double(),
##   Long = col_double(),
##   Confirmed = col_double(),
##   Recovered = col_double(),
##   Deaths = col_double()
## )
## Parsed with column specification:
## cols(
##   Entity = col_character(),
##   Date = col_date(format = ""),
##   `Source URL` = col_character(),
##   `Source label` = col_character(),
##   Notes = col_character(),
##   `Cumulative total` = col_double(),
##   `Daily change in cumulative total` = col_double(),
##   `Cumulative total per thousand` = col_double(),
##   `Daily change in cumulative total per thousand` = col_double(),
##   `3-day rolling mean daily change` = col_double(),
##   `3-day rolling mean daily change per thousand` = col_double()
## )
##  [1] "macedonia"                        "myanmar"                         
##  [3] "united states of america"         "north korea"                     
##  [5] "solomon islands"                  "somaliland"                      
##  [7] "republic of serbia"               "swaziland"                       
##  [9] "turkmenistan"                     "east timor"                      
## [11] "taiwan"                           "united republic of tanzania"     
## [13] "the bahamas"                      "vanuatu"                         
## [15] "ivory coast"                      "democratic republic of the congo"
## [17] "republic of congo"                "northern cyprus"                 
## [19] "czech republic"                   "antarctica"                      
## [21] "guinea bissau"                    "south korea"                     
## [23] "lesotho"
## # A tibble: 101 x 6
## # Groups:   region [1]
##    region Date       confirmed recovered deaths actives
##    <chr>  <date>         <dbl>     <dbl>  <dbl>   <dbl>
##  1 canada 2020-01-22         0         0      0       0
##  2 canada 2020-01-23         0         0      0       0
##  3 canada 2020-01-24         0         0      0       0
##  4 canada 2020-01-25         0         0      0       0
##  5 canada 2020-01-26         1         0      0       1
##  6 canada 2020-01-27         1         0      0       1
##  7 canada 2020-01-28         2         0      0       2
##  8 canada 2020-01-29         2         0      0       2
##  9 canada 2020-01-30         2         0      0       2
## 10 canada 2020-01-31         4         0      0       4
## # ... with 91 more rows
## [1] "hong kong"     "serbia"        "united states"
## # A tibble: 3 x 8
## # Groups:   region [3]
##   Date       region confirmed recovered deaths actives cumulative_test
##   <date>     <chr>      <dbl>     <dbl>  <dbl>   <dbl>           <dbl>
## 1 2020-01-22 denma~         0         0      0       0              NA
## 2 2020-01-25 cabo ~         0         0      0       0              NA
## 3 2020-03-18 unite~       113        26      0      87              NA
## # ... with 1 more variable: population <dbl>
## [1] "united states" "korea, south"
## # A tibble: 3 x 8
## # Groups:   region [3]
##   Date.x     region confirmed recovered deaths actives population
##   <date>     <chr>      <dbl>     <dbl>  <dbl>   <dbl>      <dbl>
## 1 2020-05-01 unite~   1103461    164015  64943  874503  329064917
## 2 2020-05-01 unite~    178685       892  27583  150210   67530172
## 3 2020-05-01 italy     207428     78249  28236  100943   60550075
## # ... with 1 more variable: cumulative_test <dbl>
## # A tibble: 3 x 3
## # Groups:   region [3]
##   region                   actives ratio_active
##   <chr>                      <dbl>        <dbl>
## 1 united states of america  874503        0.266
## 2 united kingdom            150210        0.222
## 3 italy                     100943        0.167
## # A tibble: 3 x 3
## # Groups:   region [3]
##   region     actives ratio_active
##   <chr>        <dbl>        <dbl>
## 1 san marino     457        1.35 
## 2 qatar        12648        0.447
## 3 andorra        234        0.303

deep research with testing. ratio of confirmed out of testing.

# national
data(state.regions)
US.time_covid = read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")
## Parsed with column specification:
## cols(
##   date = col_date(format = ""),
##   state = col_character(),
##   fips = col_character(),
##   cases = col_double(),
##   deaths = col_double()
## )
state.regions
##                  region abb fips.numeric fips.character
## 1                alaska  AK            2             02
## 2               alabama  AL            1             01
## 3              arkansas  AR            5             05
## 4               arizona  AZ            4             04
## 5            california  CA            6             06
## 6              colorado  CO            8             08
## 7           connecticut  CT            9             09
## 8  district of columbia  DC           11             11
## 9              delaware  DE           10             10
## 10              florida  FL           12             12
## 11              georgia  GA           13             13
## 12               hawaii  HI           15             15
## 13                 iowa  IA           19             19
## 14                idaho  ID           16             16
## 15             illinois  IL           17             17
## 16              indiana  IN           18             18
## 17               kansas  KS           20             20
## 18             kentucky  KY           21             21
## 19            louisiana  LA           22             22
## 20        massachusetts  MA           25             25
## 21             maryland  MD           24             24
## 22                maine  ME           23             23
## 23             michigan  MI           26             26
## 24            minnesota  MN           27             27
## 25             missouri  MO           29             29
## 26          mississippi  MS           28             28
## 27              montana  MT           30             30
## 28       north carolina  NC           37             37
## 29         north dakota  ND           38             38
## 30             nebraska  NE           31             31
## 31        new hampshire  NH           33             33
## 32           new jersey  NJ           34             34
## 33           new mexico  NM           35             35
## 34               nevada  NV           32             32
## 35             new york  NY           36             36
## 36                 ohio  OH           39             39
## 37             oklahoma  OK           40             40
## 38               oregon  OR           41             41
## 39         pennsylvania  PA           42             42
## 40         rhode island  RI           44             44
## 41       south carolina  SC           45             45
## 42         south dakota  SD           46             46
## 43            tennessee  TN           47             47
## 44                texas  TX           48             48
## 45                 utah  UT           49             49
## 46             virginia  VA           51             51
## 47              vermont  VT           50             50
## 48           washington  WA           53             53
## 49            wisconsin  WI           55             55
## 50        west virginia  WV           54             54
## 51              wyoming  WY           56             56
names(US.time_covid)[names(US.time_covid) == "state"] <- "region"
US.time_covid$region <- tolower(US.time_covid$region)

time_state= unique(US.time_covid$region)
df2 <- data.frame(matrix(unlist(time_state), nrow=length(time_state), byrow=T))

matched3 = inner_join(state.regions["region"], US.time_covid["region"], by ="region")
rename3 = setdiff(state.regions$region,matched3$region)
rename3 # everything is matched. No state name change is required.
## character(0)
US.time_covid_final = US.time_covid %>% select(-"fips")

write.csv(US.time_covid_final,"US_covid19_timeseries.csv")
US.time_covid_final[sample(nrow(US.time_covid_final), 3), ]
## # A tibble: 3 x 4
##   date       region       cases deaths
##   <date>     <chr>        <dbl>  <dbl>
## 1 2020-04-15 rhode island  3529     87
## 2 2020-04-13 arizona       3702    122
## 3 2020-04-08 wyoming        230      0
#national "https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm"

N_death_detail = read_csv("https://data.cdc.gov/api/views/hc4f-j6nb/rows.csv?accessType=DOWNLOAD&bom=true&format=true")
## Parsed with column specification:
## cols(
##   `Data as of` = col_character(),
##   Group = col_character(),
##   State = col_character(),
##   Indicator = col_character(),
##   `Start week` = col_character(),
##   `End week` = col_character(),
##   `All COVID-19 Deaths (U07.1)` = col_number(),
##   `Deaths from All Causes` = col_number(),
##   `Percent of Expected Deaths` = col_double(),
##   `All Pneumonia Deaths (J12.0-J18.9)` = col_number(),
##   `Deaths with Pneumonia and COVID-19 (J12.0-J18.9 and U07.1)` = col_number(),
##   `All Influenza Deaths (J09-J11)` = col_number(),
##   `Pneumonia, Influenza, and COVID-19 Deaths` = col_number(),
##   Footnote = col_character()
## )
N_death_race = read_csv("https://data.cdc.gov/api/views/pj7m-y5uh/rows.csv?accessType=DOWNLOAD&bom=true&format=true")
## Parsed with column specification:
## cols(
##   `Data as of` = col_character(),
##   State = col_character(),
##   Indicator = col_character(),
##   `Non-Hispanic White` = col_double(),
##   `Non-Hispanic Black or African American` = col_double(),
##   `Non-Hispanic American Indian or Alaska Native` = col_double(),
##   `Non-Hispanic Asian` = col_double(),
##   `Hispanic or Latino` = col_double(),
##   Other = col_double(),
##   Footnote = col_character()
## )
# site for US detail data : https://github.com/nytimes/covid-19-data
# count and show graph how many article is release about corona virus by time series & how many article about corona is there : https://developer.nytimes.com/docs/articlesearch-product/1/overview